Day7: 相機投影練習

2024 iThome 鐵人賽

DAY 8

AI/ ML & Data

3D 重建實戰：使用 2D 圖片做相機姿態估計與三維空間重建系列第 8 篇

16th鐵人賽

幕村琉德滑

團隊天堂製造

2024-09-22 19:49:42

161 瀏覽

分享至

在此練習中，我們來看看如何使用相機的投影矩陣，並且加上前一篇文的相機姿態轉換。

首先，有別於前面自己手動計算轉換矩陣，我們要使用更為方便的 look_at 函數，給定相機位置、觀看位置、以及上方向，就可以得到相機的轉換矩陣。


def look_at(camera_pos, target_pos, up_vec):
    # Compute the forward, right, and up vectors and normalize them
    forward = target_pos - camera_pos
    forward = forward / np.linalg.norm(forward)
    
    right = np.cross(up_vec, forward)
    right = right / np.linalg.norm(right)
    
    up = np.cross(forward, right)
    up = up / np.linalg.norm(up)
    
    rotation_matrix = np.eye(4, dtype=np.float32)
    rotation_matrix[:3, 0] = right  # First column is the right vector (x-axis)
    rotation_matrix[:3, 1] = up     # Second column is the up vector (y-axis)
    rotation_matrix[:3, 2] = forward  # Third column is the forward vector (z-axis)

    # Step 5: Create the translation part (camera position in world space)
    translation_matrix = np.eye(4, dtype=np.float32)
    translation_matrix[:3, 3] = camera_pos  # Set camera position in the translation part

    # Step 6: Combine the rotation and translation into the world matrix
    camera_to_world_matrix = translation_matrix @ rotation_matrix
    return camera_to_world_matrix

計算過程有些繁複，但主要是要算出相機的右、上、前方向（單位向量），當你知道了兩個向量，就可以使用外積計算垂直的另一個向量，然後組合成一個旋轉矩陣，再加上相機的位置，就可以得到相機的轉換矩陣。

假設我們在放入一個 3D 的正方體，邊長為 1，並且放在原點。我們可以將相機放在 camera_pos=(3, 3, 3) 的位置，觀看原點 (0, 0, 0) 的位置；在我們這個世界中，+z 方向是向上的，不過我們希望轉換後，相機的 y軸是朝下 (-z) 的，所以我們可以設定up_vec= (0, 0, -1)，這樣我們就可以得到相機的轉換矩陣。


import sys
import cv2
import numpy as np
from PIL import Image
from vispy import app, scene, visuals

# Create canvas
canvas = scene.SceneCanvas(title="vispy tutorial", keys="interactive", show=True)
# Make color white
canvas.bgcolor = "white"

# Create view and set the viewing camera
view = canvas.central_widget.add_view()
view.camera = "turntable"
view.camera.fov = 50
view.camera.distance = 10


def create_frustum(aspect_ratio=1.3, camera_to_world=np.eye(4)):
    objects = []    # Record all the objects to created in this function
    center = np.array([0, 0, 0])
    points = np.array([
        [0.5, 0.5, 1],
        [0.5, -0.5, 1],
        [-0.5, -0.5, 1],
        [-0.5, 0.5, 1],
    ])
    points[:, 0] *= aspect_ratio

    for i in range(4):
        line = scene.visuals.Line(pos=np.array([center, points[i]]), color="red", antialias=True, width=2, parent=view.scene)
        objects.append(line)
        line = scene.visuals.Line(pos=np.array([points[i], points[(i + 1) % 4]]), color="red", antialias=True, width=2, parent=view.scene)
        objects.append(line)

    camera_axis = scene.visuals.XYZAxis(parent=view.scene, width=2, antialias=True)
    objects.append(camera_axis)
    
    # Create the semi-transparent plane
    plane = scene.visuals.Polygon(pos=points, color=(1, 0, 0, 0.5), parent=view.scene)
    # Here the z-axis of the plane is ignored, so we need to translate it
    plane.transform = scene.transforms.MatrixTransform()
    plane.transform.translate([0, 0, 1])
    objects.append(plane)

    new_transform = scene.transforms.MatrixTransform()
    new_transform.matrix = camera_to_world.T    # NOTE: we need to transpose the matrix
    for object in objects:
        object.transform = new_transform * object.transform


def look_at(camera_pos, target_pos, up_vec):
    # Compute the forward, right, and up vectors and normalize them
    forward = target_pos - camera_pos
    forward = forward / np.linalg.norm(forward)
    
    right = np.cross(up_vec, forward)
    right = right / np.linalg.norm(right)
    
    # Recompute the real up vector
    up = np.cross(forward, right)
    up = up / np.linalg.norm(up)
    
    rotation_matrix = np.eye(4, dtype=np.float32)
    rotation_matrix[:3, 0] = right  # First column is the right vector (x-axis)
    rotation_matrix[:3, 1] = up     # Second column is the up vector (y-axis)
    rotation_matrix[:3, 2] = forward  # Third column is the forward vector (z-axis)

    # Create the translation part (camera position in world space)
    translation_matrix = np.eye(4, dtype=np.float32)
    translation_matrix[:3, 3] = camera_pos  # Set camera position in the translation part

    # Combine the rotation and translation into the world matrix
    camera_to_world_matrix = translation_matrix @ rotation_matrix
    return camera_to_world_matrix

camera_pos = np.array([3, 3, 3])
target_pos = np.array([0, 0, 0])
up_vec = np.array([0, 0, -1])
camera_to_world = look_at(camera_pos, target_pos, up_vec)

create_frustum(camera_to_world=camera_to_world)

world_axis = scene.visuals.XYZAxis(parent=view.scene, width=2, antialias=True)
world_cube = scene.visuals.Cube(size=1.0, edge_color=[0.0, 0.0, 0.0], color=[0.5, 0.5, 0.5], parent=view.scene)
if __name__ == "__main__":
    if sys.flags.interactive != 1:
        app.run()

lookat

假如我們想要將這個正方體投影到一個 2D 平面上，我們可以使用相機的投影矩陣。首先定義一個project_to_camera函數，給定相機的轉換矩陣、焦距、中心點、畫面的高度和寬度。以下是大致的步驟：

先定義出正方體的頂點和面，在世界座標系中
轉換到相機座標系中
乘上投影矩陣
除以齊次座標的 z 軸，就會得到投影到畫面上的座標
畫出正方體的線條


def project_to_camera(camera_to_world, fx, fy, cx, cy, height, width):
    # Define the cube vertices and faces
    cube_vertices = np.array([
        [-0.5, -0.5, 0.5],
        [0.5, -0.5, 0.5],
        [0.5, 0.5, 0.5],
        [-0.5, 0.5, 0.5],
        [-0.5, -0.5, -0.5],
        [0.5, -0.5, -0.5],
        [0.5, 0.5, -0.5],
        [-0.5, 0.5, -0.5],
    ])
    cube_faces = np.array([
        [0, 1, 2, 3],
        [4, 5, 6, 7],
        [0, 1, 5, 4],
        [2, 3, 7, 6],
        [0, 3, 7, 4],
        [1, 2, 6, 5],
    ])
    
    world_to_camera = np.linalg.inv(camera_to_world)
    # Create the intrinsic matrix
    intrinsic_matrix = np.eye(4)
    intrinsic_matrix[0, 0] = fx
    intrinsic_matrix[1, 1] = fy
    intrinsic_matrix[0, 2] = cx
    intrinsic_matrix[1, 2] = cy    
    
    # Build the homogeneous coordinates from the vertices
    points = np.ones((cube_vertices.shape[0], 4))
    points[:, :3] = cube_vertices
    
    #### KEY PART ####
    points_in_camera_frame = world_to_camera @ points.T
    points_projected = intrinsic_matrix @ points_in_camera_frame
    points_projected = points_projected.T
    points_projected = points_projected[:, :2] / (points_projected[:, 2:3] + 1e-6)
    ##################
    
    # Draw the cube edges in the image
    image = np.zeros((height, width, 3), dtype=np.uint8)
    for face in cube_faces:
        for i in range(4):
            start = points_projected[face[i]]
            end = points_projected[face[(i + 1) % 4]]
            start = start.astype(np.int32)
            end = end.astype(np.int32)
            image = cv2.line(image, tuple(start), tuple(end), (255, 255, 255), 2, cv2.LINE_AA)
    
    image = Image.fromarray(image)
    image.save("cube.png")    

project_to_camera(
    camera_to_world=camera_to_world,
    fx=400,
    fy=400,
    cx=320,
    cy=240,
    height=480,
    width=640
)

就會得到 2D 的投影圖片：

lookat